## Warning: 程辑包'lubridate'是用R版本4.2.3 来建造的
##
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
d = read.csv("full.csv")
d$day = day(d$ED)
d = d[, !names(d) %in% c("total_load", "pv_est")]
d = d[, !names(d) %in% c("dt", "ACDT", "ED", "DST", "yfrac")]
d = na.omit(d)
head(d)## temp cloud wind wdirection humidity rain radiation demand isDST year month
## 1 20.75 2.5 14.5 135 44.5 0 1915 1288 TRUE 2018 3
## 2 21.50 1.0 16.0 140 40.0 0 2340 1237 TRUE 2018 3
## 3 22.25 1.5 15.5 145 37.0 0 2570 1189 TRUE 2018 3
## 4 23.00 2.0 15.0 150 34.0 0 2800 1150 TRUE 2018 3
## 5 23.55 2.0 13.0 145 32.0 0 2945 1122 TRUE 2018 3
## 6 24.10 2.0 11.0 140 30.0 0 3090 1102 TRUE 2018 3
## hour minutes yday dw day
## 1 8 30 65 2 6
## 2 9 0 65 2 6
## 3 9 30 65 2 6
## 4 10 0 65 2 6
## 5 10 30 65 2 6
## 6 11 0 65 2 6
cor_matrix <- cor(d, method = "pearson")
cor_matrix <- round(cor_matrix, 2)
as.data.frame(cor_matrix)## temp cloud wind wdirection humidity rain radiation demand isDST
## temp 1.00 -0.11 0.25 0.15 -0.73 -0.11 0.52 -0.15 0.53
## cloud -0.11 1.00 0.19 0.12 0.20 0.26 -0.18 0.14 -0.17
## wind 0.25 0.19 1.00 0.35 -0.29 0.18 0.31 -0.14 0.07
## wdirection 0.15 0.12 0.35 1.00 0.03 0.12 0.30 -0.17 0.07
## humidity -0.73 0.20 -0.29 0.03 1.00 0.23 -0.46 0.17 -0.32
## rain -0.11 0.26 0.18 0.12 0.23 1.00 -0.14 0.12 -0.09
## radiation 0.52 -0.18 0.31 0.30 -0.46 -0.14 1.00 -0.56 0.27
## demand -0.15 0.14 -0.14 -0.17 0.17 0.12 -0.56 1.00 -0.25
## isDST 0.53 -0.17 0.07 0.07 -0.32 -0.09 0.27 -0.25 1.00
## year 0.07 -0.02 -0.01 -0.01 0.08 0.02 0.03 -0.10 0.13
## month -0.20 0.09 0.06 0.03 0.03 0.04 0.05 -0.09 0.01
## hour 0.12 -0.07 0.08 0.12 -0.10 0.00 -0.01 0.24 0.00
## minutes 0.00 0.00 0.00 0.00 0.00 0.00 0.00 -0.01 0.00
## yday -0.20 0.08 0.06 0.03 0.03 0.04 0.05 -0.09 0.01
## dw 0.00 0.02 -0.01 0.03 0.04 0.03 0.00 -0.14 0.00
## day 0.01 -0.04 -0.04 -0.01 0.01 -0.01 0.01 0.02 0.00
## year month hour minutes yday dw day
## temp 0.07 -0.20 0.12 0.00 -0.20 0.00 0.01
## cloud -0.02 0.09 -0.07 0.00 0.08 0.02 -0.04
## wind -0.01 0.06 0.08 0.00 0.06 -0.01 -0.04
## wdirection -0.01 0.03 0.12 0.00 0.03 0.03 -0.01
## humidity 0.08 0.03 -0.10 0.00 0.03 0.04 0.01
## rain 0.02 0.04 0.00 0.00 0.04 0.03 -0.01
## radiation 0.03 0.05 -0.01 0.00 0.05 0.00 0.01
## demand -0.10 -0.09 0.24 -0.01 -0.09 -0.14 0.02
## isDST 0.13 0.01 0.00 0.00 0.01 0.00 0.00
## year 1.00 -0.17 0.00 0.00 -0.17 0.00 -0.02
## month -0.17 1.00 0.00 0.00 1.00 0.00 0.01
## hour 0.00 0.00 1.00 0.00 0.00 0.00 0.00
## minutes 0.00 0.00 0.00 1.00 0.00 0.00 0.00
## yday -0.17 1.00 0.00 0.00 1.00 0.00 0.09
## dw 0.00 0.00 0.00 0.00 0.00 1.00 0.00
## day -0.02 0.01 0.00 0.00 0.09 0.00 1.00
## Warning: 程辑包'corrplot'是用R版本4.2.3 来建造的
## corrplot 0.92 loaded
Fit all data before 2023 used for training
# X_train = d[d$year < 2023,][,names(d) != "demand"]
# y_train = d[d$year < 2023,][,names(d) == "demand"]
# X_test = d[d$year >= 2023,][,names(d) != "demand"]
# y_test = d[d$year >= 2023,][,names(d) == "demand"]
fitdata = d[d$year < 2023, ]
tstdata = d[d$year >= 2023, ]
library(randomForest)## Warning: 程辑包'randomForest'是用R版本4.2.3 来建造的
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
mergedata = tstdata
mergedata$preds = predict(rf, mergedata)
mse = function(pre, act){mean((pre - act)^2)}
R2 = function(pre, act) 1 - sum((pre - act)^2) / sum((act - mean(act))^2)
mse_year = mse(mergedata$preds, mergedata$demand)
mse_year## [1] 36284.71
## [1] 0.8090011
mergedata$date <- as.Date(paste("2023", mergedata$month, mergedata$day, sep = "-"))
sum_demand2 <- aggregate((preds-demand)^2 ~ date, mergedata, sum)
colnames(sum_demand2)[2] <- "sum_top"
daily_mean <- aggregate(demand ~ date, mergedata, mean)
mergedata <- merge(mergedata, daily_mean, by = "date", suffixes = c("", "_mean"))
sum_demand3 = aggregate((demand -demand_mean)^2 ~ date, mergedata, sum)
colnames(sum_demand3)[2] <- "sum_bottom"
mix_data = merge(sum_demand2, sum_demand3, by = "date")
mix_data$R2 = 1-mix_data$sum_top/mix_data$sum_bottom
# aggregated_data <- aggregate(. ~ date, mergedata, sum)## Warning: 程辑包'plotly'是用R版本4.2.3 来建造的
## 载入需要的程辑包:ggplot2
## Warning: 程辑包'ggplot2'是用R版本4.2.3 来建造的
##
## 载入程辑包:'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
##
## 载入程辑包:'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# aggregate predictions, actuals and residuals by day
sum_demand3 = mergedata %>% group_by(date) %>%
summarise(
preds_sumbyday = sum(preds),
demand_sumbyday = sum(demand),
rsd_byday = sum(preds) - sum(demand)
)
# sum_demand3
plot_ly(
x = sum_demand3$date, y= sum_demand3$preds_sumbyday,
type = "scatter",
mode = "lines",
name = "2023-Preds") %>%
add_lines(sum_demand3$date, sum_demand3$demand_sumbyday, name = "2023-Actual") %>%
layout(xaxis = list(title = "2023 Date"), yaxis = list(title = "Demand"))mergedata$mse_2 = (mergedata$preds - mergedata$demand)^2
mse_demand = mergedata %>% group_by(date) %>%
summarise(
preds_sumbyday = sum(preds),
demand_sumbyday = sum(demand),
mse_byday = mean(mse_2)
)
# mse_demand
plot_ly(mse_demand, x = ~date, y = ~mse_byday, type = "scatter", mode = "lines")%>%
layout(xaxis = list(title = "2023 Date"), yaxis = list(title = "MSE by day"))Discussions: